In [ ]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#set background color grey
sns.set_theme(style="darkgrid")
In [ ]:
df = pd.read_csv("all_turns_2.csv")
df.drop(columns=['Unnamed: 0'], inplace=True)
df['turn_duration'] = 0.2*(df['end_idx'].astype('float') - df['start_idx'].astype('float'))
df.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 210.0 1838.609524 619.424474 407.000000 2102.000000 2105.000000 2107.000000 2111.000000
path_num 210.0 2.247619 0.735705 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 210.0 2.642857 1.785393 1.000000 1.000000 2.000000 3.750000 10.000000
start_idx 210.0 539.523810 430.918594 50.000000 246.000000 435.500000 666.000000 2199.000000
end_idx 210.0 590.380952 430.636602 83.000000 296.250000 483.000000 712.000000 2264.000000
walking_direction_lag 210.0 -3.980952 24.546014 -151.000000 -12.000000 -1.000000 6.000000 107.000000
walking_direction_base_corr 210.0 0.075067 0.415642 -0.857578 -0.315052 0.093226 0.413633 0.941918
walking_direction_lagged_corr 210.0 0.470502 0.163364 0.122957 0.348448 0.442860 0.570997 0.955078
walking_direction_dtw 210.0 43.989552 27.188144 4.239983 25.985486 36.686731 52.961837 162.955230
speeds_lag 210.0 -1.814286 15.690145 -69.000000 -8.000000 -1.000000 4.000000 62.000000
speeds_base_corr 210.0 0.181225 0.343211 -0.797566 -0.075247 0.192109 0.446230 0.881916
speeds_lagged_corr 210.0 0.490001 0.152821 0.171078 0.372479 0.472339 0.600638 0.881916
speeds_dtw 210.0 35.361913 18.733911 10.222585 23.357122 29.799045 42.693865 141.492438
mean_distance 210.0 2.396365 1.569543 0.336612 1.400366 2.112387 2.974131 13.639054
mean_speed_difference 210.0 0.372175 0.154074 0.086809 0.263861 0.340143 0.447599 0.922073
mean_walking_direction_difference 210.0 62.257972 19.707774 14.479058 48.836197 62.728343 77.249692 120.316045
mean_pace_asymmetry 210.0 0.436723 0.131052 0.110604 0.347882 0.428110 0.505151 0.876306
turn_duration 210.0 10.171429 6.216412 5.000000 5.800000 8.000000 11.950000 46.200000
In [ ]:
# box plot turn duration
sns.boxplot(x=df['turn_duration'])
plt.title('Box plot of turn duration')
plt.show()
No description has been provided for this image
In [ ]:
df['normalized_walking_direction_dtw'] = df['walking_direction_dtw'] / (df['turn_duration'] / 0.2)
df['normalized_speeds_dtw'] = df['speeds_dtw'] / (df['turn_duration'] / 0.2)
In [ ]:
df['abs_walking_direction_lag'] = df['walking_direction_lag'].abs()
df['abs_speeds_lag'] = df['speeds_lag'].abs()
In [ ]:
relevant_features = [
       'turn_duration', 
       'mean_distance',
       'mean_pace_asymmetry',
       'walking_direction_lag', 
       'abs_walking_direction_lag',
       'walking_direction_dtw', 
       'normalized_walking_direction_dtw', 
       # 'walking_direction_base_corr', 
       'walking_direction_lagged_corr', 
       # 'mean_walking_direction_difference', 
       'speeds_lag', 
       'abs_speeds_lag',
       'speeds_dtw', 
       'normalized_speeds_dtw', 
       # 'speeds_base_corr', 
       'speeds_lagged_corr', 
       # 'mean_speed_difference',
       ]
In [ ]:
corr = df[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt=".2f", mask=mask)
# make it bigger
plt.title(f"Metrics Correlation Matrix - Original Data (n={len(df)})")
plt.show()
No description has been provided for this image
In [ ]:
# find rows with the same participant_id and path_num with overlapping start_idx and end_idx
df['overlapping'] = False
for index, row in df.iterrows():
    if len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['start_idx'] >= row['start_idx']) & (df['start_idx'] <= row['end_idx'])]) > 1 or \
        len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['end_idx'] >= row['start_idx']) & (df['end_idx'] <= row['end_idx'])]) > 1:
        df.at[index, 'overlapping'] = True
    
overlapping_and_not_subset = df[df['overlapping'] == True]
oans = overlapping_and_not_subset

corr_oans = oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr_oans) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Overlapping Data (n={len(oans)})")
plt.show()
No description has been provided for this image
In [ ]:
threshold = 0.3
filtered_df = df[(df['walking_direction_lagged_corr'] > threshold) & (df['speeds_lagged_corr'] > threshold)]
filtered_oans = filtered_df[filtered_df['overlapping'] == True]

corr_filtered_oans = filtered_oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = (np.abs(corr_filtered_oans) < 0.3) 
plt.figure(figsize=(12, 10))
sns.heatmap(corr_filtered_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Filtered Overlapping Data (n={len(filtered_oans)})")
plt.show()
No description has been provided for this image
In [ ]:
filtered_oans.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 96.0 1822.364583 636.073748 407.000000 2101.750000 2104.000000 2107.000000 2111.000000
path_num 96.0 2.218750 0.728418 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 96.0 2.375000 1.649561 1.000000 1.000000 2.000000 3.000000 8.000000
start_idx 96.0 473.281250 387.076266 54.000000 223.500000 394.500000 579.250000 2129.000000
end_idx 96.0 529.791667 384.969470 108.000000 295.500000 445.500000 638.500000 2179.000000
walking_direction_lag 96.0 -6.208333 22.130197 -79.000000 -17.000000 -2.500000 5.250000 47.000000
walking_direction_base_corr 96.0 0.044735 0.433912 -0.857578 -0.342859 0.079463 0.436595 0.852908
walking_direction_lagged_corr 96.0 0.489776 0.142875 0.307794 0.375805 0.447162 0.588371 0.955078
walking_direction_dtw 96.0 48.883543 27.746676 4.239983 27.611524 45.737083 57.678972 162.955230
speeds_lag 96.0 -3.989583 11.887627 -48.000000 -9.000000 -1.500000 0.250000 23.000000
speeds_base_corr 96.0 0.238395 0.352438 -0.797566 0.048711 0.302736 0.488232 0.839497
speeds_lagged_corr 96.0 0.513716 0.138482 0.301195 0.395598 0.490557 0.615078 0.839497
speeds_dtw 96.0 36.646493 17.578670 10.673369 23.200637 32.107329 47.021830 90.532321
mean_distance 96.0 2.315269 1.362139 0.433665 1.364710 1.992593 3.016074 9.579321
mean_speed_difference 96.0 0.356451 0.132146 0.094192 0.266317 0.336095 0.429151 0.889566
mean_walking_direction_difference 96.0 61.315420 18.263149 14.479058 50.026540 62.640650 74.918968 98.622592
mean_pace_asymmetry 96.0 0.428294 0.099920 0.122233 0.365532 0.425443 0.476916 0.734916
turn_duration 96.0 11.302083 5.717342 5.000000 6.950000 10.100000 13.900000 34.200000
normalized_walking_direction_dtw 96.0 0.897978 0.356410 0.146206 0.644327 0.834471 1.071630 1.960544
normalized_speeds_dtw 96.0 0.675115 0.177642 0.256842 0.558886 0.670667 0.793171 1.377559
abs_walking_direction_lag 96.0 16.291667 16.139469 0.000000 3.750000 12.500000 23.250000 79.000000
abs_speeds_lag 96.0 8.135417 9.514111 0.000000 1.000000 5.000000 12.000000 48.000000
In [ ]:
from scipy.stats import pearsonr

for feature in relevant_features:
    to_display = []
    for feature2 in relevant_features:
        if not feature.startswith(feature2) and not feature2.startswith(feature) and not feature.endswith(feature2) and not feature2.endswith(feature)\
            and np.abs(corr_filtered_oans.loc[feature, feature2]) > 0.3:
            to_display.append(feature2)
    if len(to_display) == 0:
        continue
    # set plot grid of 1xlen(to_display)
    fig, axs = plt.subplots(int(np.ceil(len(to_display)/3)), min(len(to_display),3), figsize=(5*min(len(to_display),3),5*int(np.ceil(len(to_display)/3))))
    # print(axs.shape)
    axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot
    for i, feature2 in enumerate(to_display):
        peares = pearsonr(filtered_oans[feature], filtered_oans[feature2], alternative='two-sided')
        pcorr, p_val = peares.statistic, peares.pvalue
        CI = peares.confidence_interval(confidence_level=0.95)
        # Scatter plot
        sns.scatterplot(x=feature, y=feature2, data=filtered_oans, ax=axs[int(np.ceil(i/3))-1,i%3])
        # Regression line
        sns.regplot(x=feature, y=feature2, data=filtered_oans, scatter=False, line_kws={'color': 'red'}, ax=axs[int(np.ceil(i/3))-1,i%3])
        axs[int(np.ceil(i/3))-1,i%3].set_title(f"compared with {feature2}\ncorr: {round(corr_filtered_oans.loc[feature, feature2], 3)}, p_val: {round(p_val,5)}, CI: {[round(c,3) for c in CI]}", fontweight='bold')
    # add title "feature vs correlated features" to the plot
    fig.suptitle(f"{feature}'s correlations", fontweight='bold')
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
from PIL import Image
import seaborn as sns
from scipy import stats
# for each feature, find highest and lowest valued row and display them
for feature in relevant_features:
    # print(f"Feature: {feature}")
    # print("Highest values:")
    
    h_res = filtered_oans.loc[filtered_oans[feature].nlargest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(h_res)
    h_base_path = f"./turns/{h_res['participant_id'].values[0]}/{h_res['person_robot'].values[0]}/run_{h_res['path_num'].values[0]}/turn_{h_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(h_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(h_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(h_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(h_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in h_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - highest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    # print("Lowest values:")
    l_res = filtered_oans.loc[filtered_oans[feature].nsmallest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(l_res)
    l_base_path = f"./turns/{l_res['participant_id'].values[0]}/{l_res['person_robot'].values[0]}/run_{l_res['path_num'].values[0]}/turn_{l_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(l_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(l_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(l_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(l_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in l_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - lowest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    print("\n\n")
No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.histplot(data=filtered_oans, x=feature, kde=True, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
import scipy.stats as stats

fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    stats.probplot(filtered_oans[feature], dist="norm", plot=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('Theoretical Quantiles')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('Ordered Values')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.boxplot(data=filtered_oans, y=feature, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image

MISC¶

In [ ]:
stop codon
  Cell In[224], line 1
    stop codon
         ^
SyntaxError: invalid syntax
In [ ]:
import numpy as np
# plot walking_direction_lag and walking_direction_dtw
plt.scatter(abs(filtered_oans['walking_direction_lag']), filtered_oans['walking_direction_dtw'])
plt.xlabel('walking_direction_lag')
plt.ylabel('walking_direction_dtw')
plt.title('Absolute Walking Direction Lag vs DTW')

# fit regression line
x = abs(filtered_oans['walking_direction_lag'])
y = filtered_oans['walking_direction_dtw']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')

plt.show()

# plot speeds_lag and speeds_dtw
plt.scatter(abs(filtered_oans['speeds_lag']), filtered_oans['speeds_dtw'])
plt.xlabel('speeds_lag')
plt.ylabel('speeds_dtw')
plt.title('Absolute Speeds Lag vs DTW')

# fit regression line
x = abs(filtered_oans['speeds_lag'])
y = filtered_oans['speeds_dtw']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')

plt.show()
No description has been provided for this image
No description has been provided for this image
In [ ]:
# plot walking_direction_lag vs all other relevant features
for feature in relevant_features:
    if feature == 'walking_direction_lag':
        continue
    plt.scatter(filtered_oans['walking_direction_lag'], filtered_oans[feature])
    plt.xlabel('walking_direction_lag')
    plt.ylabel(feature)
    plt.title(f'Walking Direction Lag vs {feature}')
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
# plot speeds_lag vs all other relevant features
for feature in relevant_features:
    if feature == 'speeds_lag':
        continue
    plt.scatter(filtered_oans['speeds_lag'], filtered_oans[feature])
    plt.xlabel('speeds_lag')
    plt.ylabel(feature)
    plt.title(f'Speeds Lag vs {feature}')
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
# filter out outliers
filtered_oans = filtered_oans[(filtered_oans['walking_direction_lag'] < 20) & (filtered_oans['walking_direction_lag'] > -20) & (filtered_oans['speeds_lag'] < 20) & (filtered_oans['speeds_lag'] > -20)]

# plot walking direction lag vs speeds lag
plt.scatter(filtered_oans['walking_direction_lag'], filtered_oans['speeds_lag'])
plt.xlabel('walking_direction_lag')
plt.ylabel('speeds_lag')
plt.title('Walking Direction Lag vs Speeds Lag')
# plt.xlim(-20, 20)
# plt.ylim(-20, 20)

# plot regression line
x = filtered_oans['walking_direction_lag']
y = filtered_oans['speeds_lag']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')
plt.show()
No description has been provided for this image
In [ ]:
# plot walking direction dtw vs speeds dtw
plt.scatter(filtered_oans['walking_direction_dtw'], filtered_oans['speeds_dtw'])
plt.xlabel('walking_direction_dtw')
plt.ylabel('speeds_dtw')
plt.title('Walking Direction DTW vs Speeds DTW')


# fit regression line
x = filtered_oans['walking_direction_dtw']
y = filtered_oans['speeds_dtw']
coefficients = np.polyfit(x, y, 1)
regression_line = np.polyval(coefficients, x)
plt.plot(x, regression_line, color='red')
plt.show()
No description has been provided for this image
In [ ]:
df[df['walking_direction_lagged_corr'] > df['walking_direction_base_corr']].sort_values(by='walking_direction_lag', ascending=False)
Out[ ]:
participant_id path_num person_robot turn_num start_idx end_idx walking_direction_lag walking_direction_base_corr walking_direction_lagged_corr walking_direction_dtw ... speeds_dtw mean_distance mean_speed_difference mean_walking_direction_difference turn_duration normalized_walking_direction_dtw normalized_speeds_dtw abs_walking_direction_lag abs_speeds_lag overlapping
202 2111 2 person 2 507 604 47 -0.424161 0.337934 103.482527 ... 65.295422 1.392034 0.250340 57.612406 19.4 1.066830 0.673149 47 6 True
80 2103 2 robot 2 578 634 43 -0.249106 0.616893 61.422126 ... 25.631941 2.989126 0.338179 47.827222 11.2 1.096824 0.457713 43 3 True
181 2108 3 robot 3 528 592 42 0.030613 0.248076 73.712595 ... 47.000078 2.071863 0.453484 70.369095 12.8 1.151759 0.734376 42 22 True
140 2106 3 robot 9 1990 2049 37 -0.272773 0.455208 51.252940 ... 61.127953 2.010555 0.531695 60.823225 11.8 0.868694 1.036067 37 39 False
175 2108 2 robot 5 708 760 33 -0.593129 0.397421 87.662114 ... 54.166806 2.526451 0.815135 67.273508 10.4 1.685810 1.041669 33 21 False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
118 2106 1 robot 1 395 479 -41 -0.549742 0.596739 101.277338 ... 36.920811 2.505176 0.363976 60.501683 16.8 1.205683 0.439533 41 11 True
25 408 2 robot 1 394 450 -47 0.427449 0.433015 53.091645 ... 35.825730 3.623730 0.178786 50.134179 11.2 0.948065 0.639745 47 0 True
197 2109 3 robot 3 244 333 -51 -0.364310 0.376058 79.085565 ... 62.508268 2.410478 0.621798 86.824998 17.8 0.888602 0.702340 51 18 True
192 2109 2 robot 4 431 500 -59 0.225661 0.297712 65.438534 ... 32.955370 1.050741 0.305188 92.699869 13.8 0.948385 0.477614 59 3 True
70 2102 3 robot 5 1145 1237 -79 -0.421383 0.470861 125.077491 ... 43.254521 1.194304 0.334011 70.750520 18.4 1.359538 0.470158 79 7 True

176 rows × 23 columns

In [ ]:
print(df.dtypes)
participant_id                         int64
path_num                               int64
person_robot                          object
turn_num                               int64
start_idx                              int64
end_idx                                int64
walking_direction_lag                  int64
walking_direction_base_corr          float64
walking_direction_lagged_corr        float64
walking_direction_dtw                float64
speeds_lag                             int64
speeds_base_corr                     float64
speeds_lagged_corr                   float64
speeds_dtw                           float64
mean_distance                        float64
mean_speed_difference                float64
mean_walking_direction_difference    float64
turn_duration                        float64
normalized_walking_direction_dtw     float64
normalized_speeds_dtw                float64
abs_walking_direction_lag              int64
abs_speeds_lag                         int64
overlapping                             bool
dtype: object
In [ ]:
oans.corr
Out[ ]:
<bound method DataFrame.corr of      participant_id  path_num person_robot  turn_num  start_idx  end_idx  \
1               407         1        robot         1        232      300   
2               407         2       person         1         80      108   
4               407         2       person         3        400      431   
5               407         2       person         4        649      686   
6               407         2        robot         1         82      141   
..              ...       ...          ...       ...        ...      ...   
197            2109         3        robot         3        244      333   
198            2111         1       person         1        314      373   
200            2111         1        robot         2        307      363   
202            2111         2       person         2        507      604   
208            2111         3        robot         1         87      167   

     walking_direction_lag  walking_direction_base_corr  \
1                        0                     0.732906   
2                      -14                    -0.857578   
4                        0                     0.679583   
5                       -2                     0.506926   
6                      -18                    -0.490586   
..                     ...                          ...   
197                    -51                    -0.364310   
198                     -2                     0.578583   
200                    -12                     0.503671   
202                     47                    -0.424161   
208                     -9                    -0.073150   

     walking_direction_lagged_corr  walking_direction_dtw  ...  speeds_dtw  \
1                         0.732906              34.565064  ...   38.493228   
2                         0.444096              54.895241  ...   20.858506   
4                         0.679583              20.961050  ...   10.673369   
5                         0.517505              27.650007  ...   37.784558   
6                         0.317564              83.819802  ...   48.835245   
..                             ...                    ...  ...         ...   
197                       0.376058              79.085565  ...   62.508268   
198                       0.615375              24.827749  ...   28.068922   
200                       0.641915              27.348872  ...   34.160874   
202                       0.337934             103.482527  ...   65.295422   
208                       0.444889              53.814038  ...   49.325597   

     mean_distance  mean_speed_difference  mean_walking_direction_difference  \
1         1.844983               0.318515                          60.896701   
2         2.682582               0.469750                          43.091946   
4         1.857478               0.301516                          37.465756   
5         1.766503               0.331568                          65.157930   
6         3.421456               0.687661                          55.353376   
..             ...                    ...                                ...   
197       2.410478               0.621798                          86.824998   
198       1.043703               0.249136                          68.121790   
200       1.039405               0.295089                          71.104648   
202       1.392034               0.250340                          57.612406   
208       1.376935               0.280625                          44.422179   

     turn_duration  normalized_walking_direction_dtw  normalized_speeds_dtw  \
1             13.6                          0.508310               0.566077   
2              5.6                          1.960544               0.744947   
4              6.2                          0.676163               0.344302   
5              7.4                          0.747297               1.021204   
6             11.8                          1.420675               0.827716   
..             ...                               ...                    ...   
197           17.8                          0.888602               0.702340   
198           11.8                          0.420809               0.475744   
200           11.2                          0.488373               0.610016   
202           19.4                          1.066830               0.673149   
208           16.0                          0.672675               0.616570   

     abs_walking_direction_lag  abs_speeds_lag  overlapping  
1                            0               5         True  
2                           14               1         True  
4                            0               4         True  
5                            2              10         True  
6                           18              28         True  
..                         ...             ...          ...  
197                         51              18         True  
198                          2               0         True  
200                         12               0         True  
202                         47               6         True  
208                          9               4         True  

[109 rows x 23 columns]>
In [ ]:
count = len(df[df['walking_direction_lagged_corr'] > 0.5])
print(count)
79
In [ ]:
count = len(df[df['speeds_lagged_corr'] > 0.5])
print(count)
80
In [ ]:
df[df['walking_direction_lagged_corr'] == df['walking_direction_base_corr']].sort_values(by='walking_direction_lag', ascending=False).head(15)
Out[ ]:
participant_id path_num person_robot turn_num start_idx end_idx walking_direction_lag walking_direction_base_corr walking_direction_lagged_corr walking_direction_dtw ... speeds_dtw mean_distance mean_speed_difference mean_walking_direction_difference turn_duration normalized_walking_direction_dtw normalized_speeds_dtw abs_walking_direction_lag abs_speeds_lag overlapping
0 407 1 person 1 235 261 0 0.806176 0.806176 8.140371 ... 21.516657 2.706603 0.250385 73.832481 5.2 0.313091 0.827564 0 23 False
1 407 1 robot 1 232 300 0 0.732906 0.732906 34.565064 ... 38.493228 1.844983 0.318515 60.896701 13.6 0.508310 0.566077 0 5 True
206 2111 3 person 2 219 244 0 0.941918 0.941918 6.809205 ... 19.813179 0.502552 0.358440 82.283013 5.0 0.272368 0.792527 0 2 False
188 2109 2 person 5 427 483 0 0.526391 0.526391 33.637688 ... 28.478260 0.636772 0.273464 92.401208 11.2 0.600673 0.508540 0 1 True
171 2108 2 robot 1 269 331 0 0.325454 0.325454 59.084370 ... 62.017733 2.211453 0.915065 64.867664 12.4 0.952974 1.000286 0 22 False
165 2108 1 robot 2 436 464 0 0.863591 0.863591 12.509304 ... 28.906489 3.805783 0.221571 68.057128 5.6 0.446761 1.032375 0 3 False
148 2107 2 person 5 757 807 0 0.378086 0.378086 29.704120 ... 38.924311 3.368980 0.273900 53.103047 10.0 0.594082 0.778486 0 3 False
144 2107 2 person 1 333 362 0 0.716510 0.716510 14.198641 ... 13.912627 1.472380 0.263517 39.911269 5.8 0.489608 0.479746 0 0 True
102 2105 2 person 1 378 407 0 0.459780 0.459780 23.294339 ... 24.668994 2.047301 0.234966 55.996426 5.8 0.803253 0.850655 0 23 False
82 2103 2 robot 4 1064 1128 0 0.650376 0.650376 36.539870 ... 36.712230 0.871589 0.339594 71.137158 12.8 0.570935 0.573629 0 29 True
68 2102 3 robot 3 612 643 0 0.417467 0.417467 28.562510 ... 29.803919 2.785083 0.598161 88.469059 6.2 0.921371 0.961417 0 13 True
42 2101 2 person 3 1064 1093 0 0.781565 0.781565 15.187169 ... 10.358754 2.684810 0.164138 45.262136 5.8 0.523695 0.357198 0 0 False
33 2101 1 person 1 61 86 0 0.293240 0.293240 26.936113 ... 23.890237 0.902699 0.218320 24.226264 5.0 1.077445 0.955609 0 5 False
21 408 2 person 1 389 433 0 0.786175 0.786175 17.890734 ... 24.423989 3.531791 0.182818 52.813454 8.8 0.406608 0.555091 0 1 True
15 407 3 robot 2 250 312 0 0.621034 0.621034 45.833620 ... 50.858723 2.147284 0.455274 37.557363 12.4 0.739252 0.820302 0 1 True

15 rows × 23 columns

In [ ]:
df.where(df['speeds_lagged_corr'] > 0.5).sort_values(by='speeds_lagged_corr', ascending=False).dropna().head(15)
Out[ ]:
participant_id path_num person_robot turn_num start_idx end_idx walking_direction_lag walking_direction_base_corr walking_direction_lagged_corr walking_direction_dtw ... speeds_dtw mean_distance mean_speed_difference mean_walking_direction_difference turn_duration normalized_walking_direction_dtw normalized_speeds_dtw abs_walking_direction_lag abs_speeds_lag overlapping
42 2101.0 2.0 person 3.0 1064.0 1093.0 0.0 0.781565 0.781565 15.187169 ... 10.358754 2.684810 0.164138 45.262136 5.8 0.523695 0.357198 0.0 0.0 False
104 2105.0 2.0 person 3.0 749.0 816.0 -23.0 0.013932 0.509513 42.612579 ... 21.281903 0.474692 0.124701 56.562109 13.4 0.636009 0.317640 23.0 0.0 False
189 2109.0 2.0 robot 1.0 70.0 120.0 -17.0 -0.057183 0.673054 47.122442 ... 20.004949 1.724681 0.230480 53.237857 10.0 0.942449 0.400099 17.0 0.0 True
184 2109.0 2.0 person 1.0 72.0 110.0 -17.0 -0.326919 0.505911 46.229325 ... 15.671327 1.816075 0.247447 60.916486 7.6 1.216561 0.412403 17.0 0.0 False
120 2106.0 2.0 person 2.0 457.0 482.0 -9.0 0.215447 0.722022 19.120818 ... 11.133530 2.420559 0.479240 26.041330 5.0 0.764833 0.445341 9.0 2.0 False
147 2107.0 2.0 person 4.0 520.0 549.0 -10.0 0.154518 0.524378 21.969941 ... 17.870706 1.796626 0.384621 32.503747 5.8 0.757584 0.616231 10.0 9.0 True
97 2104.0 2.0 robot 1.0 60.0 110.0 21.0 -0.320415 0.334304 47.723231 ... 27.773751 0.757656 0.151960 38.997632 10.0 0.954465 0.555475 21.0 0.0 True
95 2104.0 1.0 robot 1.0 776.0 802.0 2.0 0.810848 0.838740 7.370475 ... 11.771324 1.284899 0.354692 38.198527 5.2 0.283480 0.452743 2.0 2.0 False
144 2107.0 2.0 person 1.0 333.0 362.0 0.0 0.716510 0.716510 14.198641 ... 13.912627 1.472380 0.263517 39.911269 5.8 0.489608 0.479746 0.0 0.0 True
13 407.0 3.0 person 3.0 317.0 342.0 13.0 -0.652219 0.426496 36.536157 ... 10.451860 1.143140 0.202000 44.318782 5.0 1.461446 0.418074 13.0 0.0 False
28 408.0 3.0 person 2.0 162.0 191.0 -25.0 -0.339538 0.299112 36.922039 ... 15.447943 3.337911 0.163138 78.229208 5.8 1.273174 0.532688 25.0 1.0 True
56 2102.0 2.0 person 2.0 276.0 302.0 1.0 0.296399 0.358437 27.424802 ... 15.150204 1.579503 0.211423 23.269520 5.2 1.054800 0.582700 1.0 0.0 True
191 2109.0 2.0 robot 3.0 302.0 367.0 -9.0 0.222554 0.519508 48.858096 ... 22.949901 1.654095 0.252631 69.199313 13.0 0.751663 0.353075 9.0 4.0 True
187 2109.0 2.0 person 4.0 291.0 362.0 -9.0 0.210021 0.467147 53.219390 ... 18.235794 1.805430 0.227338 68.023336 14.2 0.749569 0.256842 9.0 2.0 True
40 2101.0 2.0 person 1.0 490.0 527.0 -26.0 -0.304902 0.411042 57.569335 ... 21.816757 2.015769 0.244919 53.535738 7.4 1.555928 0.589642 26.0 6.0 True

15 rows × 23 columns

In [ ]:
corr[corr > 0.3]
Out[ ]:
turn_duration mean_distance walking_direction_lag abs_walking_direction_lag walking_direction_dtw normalized_walking_direction_dtw walking_direction_lagged_corr speeds_lag abs_speeds_lag speeds_dtw normalized_speeds_dtw speeds_lagged_corr
turn_duration 1.000000 NaN NaN 0.486517 0.727171 NaN NaN NaN NaN 0.761391 NaN NaN
mean_distance NaN 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
walking_direction_lag NaN NaN 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN
abs_walking_direction_lag 0.486517 NaN NaN 1.000000 0.753330 0.439396 NaN NaN NaN 0.335844 NaN NaN
walking_direction_dtw 0.727171 NaN NaN 0.753330 1.000000 0.512962 NaN NaN NaN 0.548151 NaN NaN
normalized_walking_direction_dtw NaN NaN NaN 0.439396 0.512962 1.000000 NaN NaN NaN NaN NaN NaN
walking_direction_lagged_corr NaN NaN NaN NaN NaN NaN 1.0 NaN NaN NaN NaN NaN
speeds_lag NaN NaN NaN NaN NaN NaN NaN 1.0 NaN NaN NaN NaN
abs_speeds_lag NaN NaN NaN NaN NaN NaN NaN NaN 1.000000 0.544645 0.447434 NaN
speeds_dtw 0.761391 NaN NaN 0.335844 0.548151 NaN NaN NaN 0.544645 1.000000 0.327886 NaN
normalized_speeds_dtw NaN NaN NaN NaN NaN NaN NaN NaN 0.447434 0.327886 1.000000 NaN
speeds_lagged_corr NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.0
In [ ]:
rel.describe()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/Users/yoav.sc/Desktop/GPS Data Analysis/analyze_turns_data.ipynb Cell 27 line 1
----> <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X35sZmlsZQ%3D%3D?line=0'>1</a> rel.describe()

NameError: name 'rel' is not defined
In [ ]:
for feature in relevant_features:
    sns.histplot(data=rel, x=feature, kde=True)
    plt.show()
    # sns.boxplot(data=rel, x=feature)
    # plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/Users/yoav.sc/Desktop/GPS Data Analysis/analyze_turns_data.ipynb Cell 28 line 2
      <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=0'>1</a> for feature in relevant_features:
----> <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=1'>2</a>     sns.histplot(data=rel, x=feature, kde=True)
      <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=2'>3</a>     plt.show()
      <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=3'>4</a>     # sns.boxplot(data=rel, x=feature)
      <a href='vscode-notebook-cell:/Users/yoav.sc/Desktop/GPS%20Data%20Analysis/analyze_turns_data.ipynb#X36sZmlsZQ%3D%3D?line=4'>5</a>     # plt.show()

NameError: name 'rel' is not defined
In [ ]:
# display paths of turns with highest and lowest correlation
# highest correlation
highest_corr = rel['speeds_lagged_corr'].idxmax()
print("Highest correlation (Speed):")
print(df.iloc[highest_corr])
highest_corr = rel['walking_direction_lagged_corr'].idxmax()
print("Highest correlation (Walking direction):")
print(df.iloc[highest_corr])
# lowest correlation
lowest_corr = rel['speeds_lagged_corr'].idxmin()
print("Lowest correlation (Speed):")
print(df.iloc[lowest_corr])
lowest_corr = rel['walking_direction_lagged_corr'].idxmin()
print("Lowest correlation (Walking direction):")
print(df.iloc[lowest_corr])
Highest correlation (Speed):
participant_id                            2101
path_num                                     3
person_robot                            person
turn_num                                     1
start_idx                                  358
end_idx                                    384
walking_direction_lag                        1
walking_direction_base_corr           0.577738
walking_direction_lagged_corr         0.672207
walking_direction_dtw                20.737426
speeds_lag                                 -16
speeds_base_corr                     -0.363852
speeds_lagged_corr                    0.470437
speeds_dtw                           25.559312
mean_distance                         4.330081
mean_speed_difference                 0.436231
mean_walking_direction_difference    27.799034
turn_duration                              5.2
normalized_walking_direction_dtw      3.987967
normalized_speeds_dtw                 4.915252
abs_walking_direction_lag                    1
abs_speeds_lag                              16
overlapping                               True
Name: 45, dtype: object
Highest correlation (Walking direction):
participant_id                            2105
path_num                                     2
person_robot                             robot
turn_num                                     4
start_idx                                 1207
end_idx                                   1255
walking_direction_lag                        4
walking_direction_base_corr           0.485074
walking_direction_lagged_corr         0.508153
walking_direction_dtw                39.062309
speeds_lag                                 -23
speeds_base_corr                     -0.071304
speeds_lagged_corr                    0.347287
speeds_dtw                           36.752992
mean_distance                         2.585128
mean_speed_difference                 0.319521
mean_walking_direction_difference    46.112376
turn_duration                              9.6
normalized_walking_direction_dtw      4.068991
normalized_speeds_dtw                 3.828437
abs_walking_direction_lag                    4
abs_speeds_lag                              23
overlapping                              False
Name: 110, dtype: object
Lowest correlation (Speed):
participant_id                            2102
path_num                                     1
person_robot                             robot
turn_num                                     2
start_idx                                  287
end_idx                                    319
walking_direction_lag                        7
walking_direction_base_corr          -0.324347
walking_direction_lagged_corr         0.401246
walking_direction_dtw                25.914037
speeds_lag                                   1
speeds_base_corr                      0.435508
speeds_lagged_corr                    0.460162
speeds_dtw                           32.683168
mean_distance                         5.296994
mean_speed_difference                 0.820219
mean_walking_direction_difference    41.311229
turn_duration                              6.4
normalized_walking_direction_dtw      4.049068
normalized_speeds_dtw                 5.106745
abs_walking_direction_lag                    7
abs_speeds_lag                               1
overlapping                              False
Name: 53, dtype: object
Lowest correlation (Walking direction):
participant_id                            2101
path_num                                     1
person_robot                             robot
turn_num                                     1
start_idx                                  583
end_idx                                    650
walking_direction_lag                        8
walking_direction_base_corr           0.414258
walking_direction_lagged_corr         0.510496
walking_direction_dtw                  31.8583
speeds_lag                                   4
speeds_base_corr                      0.329163
speeds_lagged_corr                    0.384854
speeds_dtw                           40.768658
mean_distance                         4.009144
mean_speed_difference                 0.220985
mean_walking_direction_difference    69.468379
turn_duration                             13.4
normalized_walking_direction_dtw      2.377485
normalized_speeds_dtw                 3.042437
abs_walking_direction_lag                    8
abs_speeds_lag                               4
overlapping                               True
Name: 38, dtype: object
In [ ]:
threshold = 0.5
filtered_df = df[(df['walking_direction_lagged_corr'] > threshold) & (df['speeds_lagged_corr'] > threshold)]
In [ ]:
len(filtered_df)
Out[ ]:
39
In [ ]:
filtered_df.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 39.0 1757.384615 694.839119 407.000000 2101.500000 2105.000000 2106.500000 2111.000000
path_num 39.0 2.205128 0.656124 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 39.0 2.538462 1.958062 1.000000 1.000000 2.000000 3.000000 9.000000
start_idx 39.0 543.923077 496.897182 70.000000 238.500000 400.000000 661.000000 2199.000000
end_idx 39.0 582.615385 496.691984 110.000000 277.500000 431.000000 689.000000 2225.000000
walking_direction_lag 39.0 -0.897436 10.651896 -23.000000 -5.000000 0.000000 1.500000 43.000000
walking_direction_base_corr 39.0 0.377439 0.350161 -0.497956 0.155233 0.450423 0.636021 0.866582
walking_direction_lagged_corr 39.0 0.639285 0.123473 0.505911 0.524876 0.613507 0.715917 0.955078
walking_direction_dtw 39.0 26.079023 13.194644 4.239983 18.846778 21.961237 32.353181 61.422126
speeds_lag 39.0 -2.230769 6.322314 -20.000000 -5.500000 -2.000000 0.000000 18.000000
speeds_base_corr 39.0 0.363470 0.348324 -0.407370 0.155902 0.469160 0.592340 0.881916
speeds_lagged_corr 39.0 0.650329 0.107183 0.514895 0.559972 0.628263 0.707393 0.881916
speeds_dtw 39.0 23.657810 8.449252 10.222585 17.453590 23.802687 28.914904 42.930466
mean_distance 39.0 2.134111 2.067214 0.433665 1.288185 1.775110 2.303315 13.639054
mean_speed_difference 39.0 0.334382 0.104774 0.124701 0.261519 0.326160 0.403904 0.538114
mean_walking_direction_difference 39.0 51.980670 19.681307 14.479058 39.022264 50.174653 61.618709 100.850062
turn_duration 39.0 7.738462 2.589515 5.000000 5.800000 7.000000 9.700000 13.600000
normalized_walking_direction_dtw 39.0 3.334042 1.168407 0.731032 2.885781 3.330639 3.800419 6.082806
normalized_speeds_dtw 39.0 3.154247 0.975970 1.588202 2.311082 3.122505 3.858179 5.189416
abs_walking_direction_lag 39.0 6.282051 8.589923 0.000000 1.000000 2.000000 9.000000 43.000000
abs_speeds_lag 39.0 4.589744 4.843433 0.000000 1.000000 3.000000 7.000000 20.000000
In [ ]:
# plot correlation matrix
corr = filtered_df[relevant_features].corr(method='pearson',numeric_only=True)
# plt.figure(figsize=(10,10))
sns.heatmap(corr, annot=True, fmt=".2f")
plt.show()
No description has been provided for this image
In [ ]:
corr[abs(corr) > 0.3]
Out[ ]:
turn_duration mean_distance walking_direction_lag abs_walking_direction_lag walking_direction_dtw normalized_walking_direction_dtw walking_direction_lagged_corr speeds_lag abs_speeds_lag speeds_dtw normalized_speeds_dtw speeds_lagged_corr
turn_duration 1.000000 NaN NaN 0.349332 0.709851 NaN NaN NaN NaN 0.530405 -0.305060 NaN
mean_distance NaN 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
walking_direction_lag NaN NaN 1.000000 NaN NaN NaN NaN NaN NaN NaN NaN -0.451192
abs_walking_direction_lag 0.349332 NaN NaN 1.000000 0.729586 0.608240 -0.320435 NaN NaN NaN NaN NaN
walking_direction_dtw 0.709851 NaN NaN 0.729586 1.000000 0.740449 -0.591417 NaN NaN NaN NaN NaN
normalized_walking_direction_dtw NaN NaN NaN 0.608240 0.740449 1.000000 -0.736542 NaN NaN NaN NaN NaN
walking_direction_lagged_corr NaN NaN NaN -0.320435 -0.591417 -0.736542 1.000000 NaN -0.336904 NaN NaN NaN
speeds_lag NaN NaN NaN NaN NaN NaN NaN 1.0 NaN NaN NaN NaN
abs_speeds_lag NaN NaN NaN NaN NaN NaN -0.336904 NaN 1.000000 0.489237 0.558836 -0.300640
speeds_dtw 0.530405 NaN NaN NaN NaN NaN NaN NaN 0.489237 1.000000 0.615769 -0.446811
normalized_speeds_dtw -0.305060 NaN NaN NaN NaN NaN NaN NaN 0.558836 0.615769 1.000000 -0.516680
speeds_lagged_corr NaN NaN -0.451192 NaN NaN NaN NaN NaN -0.300640 -0.446811 -0.516680 1.000000
In [ ]:
for feature in relevant_features:
    sns.histplot(data=filtered_df, x=feature, kde=True)
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image